library(ggplot2)
library(dplyr)
# Read data
tree_data <- read.csv("G:/My Drive/Postdoc DE-TUD/FOSTER Project/Classes Material/DATASETS Phase III/tree_measurements.csv")
# Calculate basal area (cm²)
tree_data <- tree_data %>%
mutate(basal_area_cm2 = pi * (diameter_cm / 2)^2)
# Summarize average basal area by species
basal_summary <- tree_data %>%
group_by(species) %>%
summarise(mean_basal_area = mean(basal_area_cm2))
# Plot the result
ggplot(basal_summary, aes(x = species, y = mean_basal_area)) +
geom_col(fill = "steelblue") +
labs(title = "Average Basal Area by Species", y = "Basal Area (cm²)", x = "Species")
# Calculate volume (simplified cylinder model, result in cm³)
tree_data <- tree_data %>%
mutate(volume_cm3 = pi * (diameter_cm / 2)^2 * height_m * 100)  # multiply by 100 for unit consistency
# Detect outliers within each species using IQR
outliers <- tree_data %>%
group_by(species) %>%
filter(volume_cm3 < quantile(volume_cm3, 0.25) - 1.5 * IQR(volume_cm3) |
volume_cm3 > quantile(volume_cm3, 0.75) + 1.5 * IQR(volume_cm3))
# Plot volume with outliers highlighted
ggplot(tree_data, aes(x = species, y = volume_cm3)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(aes(color = tree_id %in% outliers$tree_id), width = 0.2, alpha = 0.7) +
scale_color_manual(values = c("FALSE" = "black", "TRUE" = "red"), guide = "none") +
labs(title = "Volume per Tree by Species with Outliers Highlighted", y = "Volume (cm³)")
# Scatter plot: height vs. diameter, point size = volume, color = species
ggplot(tree_data, aes(x = diameter_cm, y = height_m, size = volume_cm3, color = species)) +
geom_point(alpha = 0.7) +
labs(title = "Tree Size Relationship", x = "Diameter (cm)", y = "Height (m)", size = "Volume (cm³)") +
theme_minimal()
# Trying to compute a new column for volume
tree_data$volume <- pi * (tree_data$diameter_cm / 2)^2 * tree_data$height_m
summary(tree_data$volumes)
tree_data$diameter_cm
tree_data$height_m
pi * (tree_data$diameter_cm / 2)^2 * tree_data$height_m
tree_data$volumes
library(microbenchmark)
